Data importation and cleaning

This data set was generated by Reddit user yuxbni76 https://www.reddit.com/user/yuxbni76 The data was scraped from ESPN.com and covers the 2002 to 2019 seasons up to week 6. Three games were missing in the data set (kind of weird they are all near holidays).

So I found that data on https://www.pro-football-reference.com/ and added it into the data set before importing.

nfl_stats<- read.csv("nfl_dataset_2002-2019week6.csv")
summary(nfl_stats)
##      date               away               home           first_downs_away
##  Length:4631        Length:4631        Length:4631        Min.   : 3.00   
##  Class :character   Class :character   Class :character   1st Qu.:15.00   
##  Mode  :character   Mode  :character   Mode  :character   Median :19.00   
##                                                           Mean   :18.78   
##                                                           3rd Qu.:22.00   
##                                                           Max.   :37.00   
##  first_downs_home third_downs_away   third_downs_home   fourth_downs_away 
##  Min.   : 3.00    Length:4631        Length:4631        Length:4631       
##  1st Qu.:16.00    Class :character   Class :character   Class :character  
##  Median :20.00    Mode  :character   Mode  :character   Mode  :character  
##  Mean   :19.78                                                            
##  3rd Qu.:23.00                                                            
##  Max.   :40.00                                                            
##  fourth_downs_home  passing_yards_away passing_yards_home rushing_yards_away
##  Length:4631        Min.   : -7.0      Min.   :  6.0      Min.   :-18.0     
##  Class :character   1st Qu.:164.0      1st Qu.:172.0      1st Qu.: 73.0     
##  Mode  :character   Median :217.0      Median :221.0      Median :103.0     
##                     Mean   :219.9      Mean   :226.6      Mean   :109.7     
##                     3rd Qu.:273.0      3rd Qu.:276.0      3rd Qu.:139.0     
##                     Max.   :516.0      Max.   :522.0      Max.   :351.0     
##  rushing_yards_home total_yards_away total_yards_home comp_att_away     
##  Min.   : -3.0      Min.   : 26.0    Min.   : 77.0    Length:4631       
##  1st Qu.: 81.0      1st Qu.:270.0    1st Qu.:286.0    Class :character  
##  Median :112.0      Median :329.0    Median :343.0    Mode  :character  
##  Mean   :117.8      Mean   :329.6    Mean   :344.4                      
##  3rd Qu.:148.0      3rd Qu.:389.0    3rd Qu.:400.0                      
##  Max.   :378.0      Max.   :643.0    Max.   :653.0                      
##  comp_att_home       sacks_away         sacks_home        rushing_attempts_away
##  Length:4631        Length:4631        Length:4631        Min.   : 6.00        
##  Class :character   Class :character   Class :character   1st Qu.:21.00        
##  Mode  :character   Mode  :character   Mode  :character   Median :26.00        
##                                                           Mean   :26.59        
##                                                           3rd Qu.:32.00        
##                                                           Max.   :57.00        
##  rushing_attempts_home  fumbles_away     fumbles_home      int_away     
##  Min.   : 6.00         Min.   :0.0000   Min.   :0.000   Min.   :0.0000  
##  1st Qu.:22.00         1st Qu.:0.0000   1st Qu.:0.000   1st Qu.:0.0000  
##  Median :28.00         Median :0.0000   Median :0.000   Median :1.0000  
##  Mean   :27.83         Mean   :0.6597   Mean   :0.653   Mean   :0.9836  
##  3rd Qu.:33.00         3rd Qu.:1.0000   3rd Qu.:1.000   3rd Qu.:2.0000  
##  Max.   :60.00         Max.   :5.0000   Max.   :4.000   Max.   :6.0000  
##     int_home     turnovers_away  turnovers_home  penalties_away    
##  Min.   :0.000   Min.   :0.000   Min.   :0.000   Length:4631       
##  1st Qu.:0.000   1st Qu.:1.000   1st Qu.:1.000   Class :character  
##  Median :1.000   Median :1.000   Median :1.000   Mode  :character  
##  Mean   :0.916   Mean   :1.643   Mean   :1.569                     
##  3rd Qu.:1.000   3rd Qu.:2.000   3rd Qu.:2.000                     
##  Max.   :6.000   Max.   :8.000   Max.   :7.000                     
##  penalties_home     redzone_away       redzone_home        drives_away   
##  Length:4631        Length:4631        Length:4631        Min.   : 0.00  
##  Class :character   Class :character   Class :character   1st Qu.:11.00  
##  Mode  :character   Mode  :character   Mode  :character   Median :12.00  
##                                                           Mean   :12.48  
##                                                           3rd Qu.:14.00  
##                                                           Max.   :26.00  
##   drives_home   def_st_td_away   def_st_td_home   possession_away   
##  Min.   : 0.0   Min.   :0.0000   Min.   :0.0000   Length:4631       
##  1st Qu.:11.0   1st Qu.:0.0000   1st Qu.:0.0000   Class :character  
##  Median :12.0   Median :0.0000   Median :0.0000   Mode  :character  
##  Mean   :12.4   Mean   :0.3468   Mean   :0.3701                     
##  3rd Qu.:14.0   3rd Qu.:0.0000   3rd Qu.:0.0000                     
##  Max.   :25.0   Max.   :6.0000   Max.   :6.0000                     
##  possession_home      score_away      score_home  
##  Length:4631        Min.   : 0.00   Min.   : 0.0  
##  Class :character   1st Qu.:14.00   1st Qu.:16.0  
##  Mode  :character   Median :20.00   Median :23.0  
##                     Mean   :20.82   Mean   :23.3  
##                     3rd Qu.:27.00   3rd Qu.:30.0  
##                     Max.   :59.00   Max.   :62.0
# Convert columns that make sense to factor
## Home and away team names
nfl_stats$home <- as.factor(nfl_stats$home)
nfl_stats$away <- as.factor(nfl_stats$away)

Team colors

Team colors were extracted from https://teamcolorcodes.com, I took the first primary color for each team and created a list that will be for later use. For the Browns and Titans I took the secondary color as it seemed more appropriate.

Team_colors <- c("49ers"="#AA0000",
                 "Bears"="#0B162A",
                 "Bengals"="#FB4F14",
                 "Bills"="#00338D",
                 "Broncos"="#FB4F14",
                 "Browns"="#FF3C00",
                 "Buccaneers"="#D50A0A",
                 "Cardinals"="#97233F",
                 "Chargers"="#0080C6",
                 "Chiefs"="#E31837",
                 "Colts"="#002C5F",
                 "Cowboys"="#041E42",
                 "Dolphins"="#008E97",
                 "Eagles"="#004C54",
                 "Falcons"="#A71930",
                 "Giants"="#0B2265",
                 "Jaguars"="#006778",
                 "Jets"="#125740",
                 "Lions"="#0076B6",
                 "Packers"="#203731",
                 "Panthers"="#0085CA",
                 "Patriots"="#002244",
                 "Raiders"="#000000",
                 "Rams"="#003594",
                 "Ravens"="#241773",
                 "Redskins"="#773141",
                 "Saints"="#D3BC8D",
                 "Seahawks"="#002244",
                 "Steelers"="#FFB612",
                 "Texans"="#03202F",
                 "Titans"="#4B92DB",
                 "Vikings"="#4F2683")

Simple plots

A <- ggplot(nfl_stats, aes(x = home)) + geom_bar(aes(fill = home)) +
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)) +
  ggtitle("Home Games") + ylab("# Games") + scale_fill_manual(values = Team_colors) +
  theme(legend.position = "none")
B <- ggplot(nfl_stats, aes(x = away)) + geom_bar(aes(fill = away)) +
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)) +
  ggtitle("Away Games") + ylab("# Games") + scale_fill_manual(values = Team_colors) +
  theme(legend.position = "none")

ggarrange(A,B)

# wins by team
Teams_32.list <- levels(nfl_stats$home)

numb_wins_list <- c()
for (i in Teams_32.list){
  x <- dim(nfl_stats[nfl_stats$home == i ,])
  y <- dim(nfl_stats[nfl_stats$home == i & nfl_stats$score_away < nfl_stats$score_home,] )
  z<- c(x[1], y[1])
  numb_wins_list[[i]] <- z
}
Home_wins_n_losses <-  as.data.frame(do.call(rbind, numb_wins_list))
colnames(Home_wins_n_losses) <- c("Games", "Wins")
Home_wins_n_losses$Losses <- Home_wins_n_losses$Games - Home_wins_n_losses$Wins
Home_wins_n_losses$Teams <- rownames(Home_wins_n_losses)



long <- reshape2::melt(Home_wins_n_losses[,-1], id='Teams')


ggplot(long, aes(x=Teams, y=value, fill=variable)) + geom_bar(stat = "identity")+
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))

Passing Yards

nfl_stats$game_number <- seq(1,length(nfl_stats$date), 1)

# All teams
X <- ggplot(nfl_stats, aes(x=game_number, y=passing_yards_home, color = home, label=score_home, label2=score_away)) + geom_smooth(se=FALSE) + scale_color_manual(values = Team_colors) +
  annotate("rect", fill = "black", alpha= 0.25, xmin=1, xmax=267, ymin=-Inf, ymax=Inf) +
  annotate("rect", fill = "white", alpha= 0.0, xmin=267, xmax=534, ymin=-Inf, ymax=Inf) +
  annotate("rect", fill = "black", alpha= 0.25, xmin=534, xmax=801, ymin=-Inf, ymax=Inf) +
  annotate("rect", fill = "white", alpha= 0.0, xmin=801, xmax=1068, ymin=-Inf, ymax=Inf) +
  annotate("rect", fill = "black", alpha= 0.25, xmin=1068, xmax=1335, ymin=-Inf, ymax=Inf) +
  annotate("rect", fill = "white", alpha= 0.0, xmin=1335, xmax=1602, ymin=-Inf, ymax=Inf) +
  annotate("rect", fill = "black", alpha= 0.25, xmin=1602, xmax=1869, ymin=-Inf, ymax=Inf) +
  annotate("rect", fill = "white", alpha= 0.0, xmin=1869, xmax=2136, ymin=-Inf, ymax=Inf) +
  annotate("rect", fill = "black", alpha= 0.25, xmin=2136, xmax=2403, ymin=-Inf, ymax=Inf) +
  annotate("rect", fill = "white", alpha= 0.0, xmin=2403, xmax=2670, ymin=-Inf, ymax=Inf) +
  annotate("rect", fill = "black", alpha= 0.25, xmin=2670, xmax=2937, ymin=-Inf, ymax=Inf) +
  annotate("rect", fill = "white", alpha= 0.0, xmin=2937, xmax=3204, ymin=-Inf, ymax=Inf) +
  annotate("rect", fill = "black", alpha= 0.25, xmin=3204, xmax=3471, ymin=-Inf, ymax=Inf) +
  annotate("rect", fill = "white", alpha= 0.0, xmin=3471, xmax=3738, ymin=-Inf, ymax=Inf) +
  annotate("rect", fill = "black", alpha= 0.25, xmin=3738, xmax=4005, ymin=-Inf, ymax=Inf) +
  annotate("rect", fill = "white", alpha= 0.0, xmin=4005, xmax=4272, ymin=-Inf, ymax=Inf) +
  annotate("rect", fill = "black", alpha= 0.25, xmin=4272, xmax=4539, ymin=-Inf, ymax=Inf) +
  annotate("rect", fill = "white", alpha= 0.0, xmin=4539, xmax=4631, ymin=-Inf, ymax=Inf) +
  annotate("text", x = 134, y = 310, label = "2002", vjust = -0.5) +
  annotate("text", x = 4405, y = 310, label = "2018", vjust = -0.5) 
X
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

ggsave("Passing_yards_home2002-2018.png", X, width = 6,height = 6)
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
P <- ggplot(nfl_stats, aes(x=game_number, y=passing_yards_home, color = home, label=date, label2=score_home, label3=score_away)) + geom_smooth(se=FALSE) + scale_color_manual(values = Team_colors) +
  geom_point()

Q <- ggplot(nfl_stats, aes(x=game_number, y=passing_yards_away, color = away, label=date, label2=score_home, label3=score_away)) + geom_smooth(se=FALSE) + scale_color_manual(values = Team_colors) +
  geom_point()

ggplotly(P, tooltip = c('passing_yards_home', 'home', 'date','score_home','score_away'))
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
ggplotly(Q, tooltip = c('passing_yards_away', 'away', 'date','score_home','score_away'))
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

#```{r} #AFC WEST #ggplot(nfl_stats[nfl_stats$home == c(‘Chiefs’, ‘Raiders’, ‘Chargers’, ‘Broncos’),], aes(x=game_number, y=passing_yards_home, color = home)) + #geom_smooth(se=FALSE) + scale_color_manual(values = Team_colors)

#ggplot(nfl_stats[nfl_stats$home == c(‘Chiefs’, ‘Raiders’, ‘Chargers’, ‘Broncos’),], aes(x=game_number, y=rushing_yards_home, color = home)) + #geom_smooth(se=FALSE) + scale_color_manual(values = Team_colors) #```